{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Import dependencies\n",
    "==================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import scipy.io\n",
    "import numpy as np\n",
    "from random import shuffle\n",
    "import random\n",
    "import spectral\n",
    "import scipy.ndimage\n",
    "from skimage.transform import rotate\n",
    "import os\n",
    "import patch_size\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load dataset\n",
    "==========="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "DATA_PATH = os.path.join(os.getcwd(),\"Data\")\n",
    "input_mat = scipy.io.loadmat(os.path.join(DATA_PATH, 'Indian_pines.mat'))['indian_pines']\n",
    "target_mat = scipy.io.loadmat(os.path.join(DATA_PATH, 'Indian_pines_gt.mat'))['indian_pines_gt']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define global variables\n",
    "======================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "HEIGHT = input_mat.shape[0]\n",
    "WIDTH = input_mat.shape[1]\n",
    "BAND = input_mat.shape[2]\n",
    "PATCH_SIZE = patch_size.patch_size\n",
    "TRAIN_PATCH,TRAIN_LABELS,TEST_PATCH,TEST_LABELS = [],[],[],[]\n",
    "CLASSES = [] \n",
    "COUNT = 200 #Number of patches of each class\n",
    "OUTPUT_CLASSES = 16\n",
    "TEST_FRAC = 0.25 #Fraction of data to be used for testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "PATCH_SIZE"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Scale the input between [0,1]\n",
    "=========================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "input_mat = input_mat.astype(float)\n",
    "input_mat -= np.min(input_mat)\n",
    "input_mat /= np.max(input_mat)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Calculate the mean of each channel for normalization\n",
    "===================================================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "MEAN_ARRAY = np.ndarray(shape=(BAND,),dtype=float)\n",
    "for i in range(BAND):\n",
    "    MEAN_ARRAY[i] = np.mean(input_mat[:,:,i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def Patch(height_index,width_index):\n",
    "    \"\"\"\n",
    "    Returns a mean-normalized patch, the top left corner of which \n",
    "    is at (height_index, width_index)\n",
    "    \n",
    "    Inputs: \n",
    "    height_index - row index of the top left corner of the image patch\n",
    "    width_index - column index of the top left corner of the image patch\n",
    "    \n",
    "    Outputs:\n",
    "    mean_normalized_patch - mean normalized patch of size (PATCH_SIZE, PATCH_SIZE) \n",
    "    whose top left corner is at (height_index, width_index)\n",
    "    \"\"\"\n",
    "    transpose_array = np.transpose(input_mat,(2,0,1))\n",
    "    height_slice = slice(height_index, height_index+PATCH_SIZE)\n",
    "    width_slice = slice(width_index, width_index+PATCH_SIZE)\n",
    "    patch = transpose_array[:, height_slice, width_slice]\n",
    "    mean_normalized_patch = []\n",
    "    for i in range(patch.shape[0]):\n",
    "        mean_normalized_patch.append(patch[i] - MEAN_ARRAY[i]) \n",
    "    \n",
    "    return np.array(mean_normalized_patch)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Collect all available patches of each class from the given image\n",
    "================================================================"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for i in range(OUTPUT_CLASSES):\n",
    "    CLASSES.append([])\n",
    "for i in range(HEIGHT - PATCH_SIZE + 1):\n",
    "    for j in range(WIDTH - PATCH_SIZE + 1):\n",
    "        curr_inp = Patch(i,j)\n",
    "        curr_tar = target_mat[i + int((PATCH_SIZE - 1)/2), j + int((PATCH_SIZE - 1)/2)]\n",
    "        if(curr_tar!=0): #Ignore patches with unknown landcover type for the central pixel\n",
    "            CLASSES[curr_tar-1].append(curr_inp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "46\n",
      "1428\n",
      "830\n",
      "237\n",
      "483\n",
      "730\n",
      "28\n",
      "478\n",
      "20\n",
      "972\n",
      "2455\n",
      "593\n",
      "205\n",
      "1265\n",
      "386\n",
      "93\n"
     ]
    }
   ],
   "source": [
    "for c  in CLASSES:\n",
    "    print len(c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make a test split with 25% data from each class\n",
    "==============================================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for c in range(OUTPUT_CLASSES): #for each class\n",
    "    class_population = len(CLASSES[c])\n",
    "    test_split_size = int(class_population*TEST_FRAC)\n",
    "        \n",
    "    patches_of_current_class = CLASSES[c]\n",
    "    shuffle(patches_of_current_class)\n",
    "    \n",
    "    #Make training and test splits\n",
    "    TRAIN_PATCH.append(patches_of_current_class[:-test_split_size])\n",
    "        \n",
    "    TEST_PATCH.extend(patches_of_current_class[-test_split_size:])\n",
    "    TEST_LABELS.extend(np.full(test_split_size, c, dtype=int))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "35\n",
      "1071\n",
      "623\n",
      "178\n",
      "363\n",
      "548\n",
      "21\n",
      "359\n",
      "15\n",
      "729\n",
      "1842\n",
      "445\n",
      "154\n",
      "949\n",
      "290\n",
      "70\n"
     ]
    }
   ],
   "source": [
    "for c in TRAIN_PATCH:\n",
    "    print len(c)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Oversample the classes which do not have at least COUNT patches in the training set and extract COUNT patches\n",
    "============================================================================================================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for i in range(OUTPUT_CLASSES):\n",
    "    if(len(TRAIN_PATCH[i])<COUNT):\n",
    "        tmp = TRAIN_PATCH[i]\n",
    "        for j in range(COUNT/len(TRAIN_PATCH[i])):\n",
    "            shuffle(TRAIN_PATCH[i])\n",
    "            TRAIN_PATCH[i] = TRAIN_PATCH[i] + tmp\n",
    "    shuffle(TRAIN_PATCH[i])\n",
    "    TRAIN_PATCH[i] = TRAIN_PATCH[i][:COUNT]\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n",
      "200\n"
     ]
    }
   ],
   "source": [
    "for c in TRAIN_PATCH:\n",
    "    print len(c)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "TRAIN_PATCH = np.asarray(TRAIN_PATCH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "TRAIN_PATCH = TRAIN_PATCH.reshape((-1,220,PATCH_SIZE,PATCH_SIZE))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "TRAIN_LABELS = np.array([])\n",
    "for l in range(OUTPUT_CLASSES):\n",
    "    TRAIN_LABELS = np.append(TRAIN_LABELS, np.full(COUNT, l, dtype=int))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Augment the data with random flipped and rotated patches\n",
    "========================================================"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# for i in range(OUTPUT_CLASSES):\n",
    "#     shuffle(CLASSES[i])\n",
    "#     for j in range(COUNT/2): #There will be COUNT/2 original patches and COUNT/2 randomly rotated/flipped patches of each class\n",
    "#         num = random.randint(0,2)\n",
    "#         if num == 0 :\n",
    "#             flipped_patch = np.flipud(CLASSES[i][j]) #Flip patch up-down\n",
    "#         if num == 1 :\n",
    "#             flipped_patch = np.fliplr(CLASSES[i][j]) #Flip patch left-right\n",
    "#         if num == 2 :\n",
    "#             no = random.randrange(-180,180,30)\n",
    "#             flipped_patch = scipy.ndimage.interpolation.rotate(CLASSES[i][j], no,axes=(1, 0), \n",
    "#                     reshape=False, output=None, order=3, mode='constant', cval=0.0, prefilter=False) #Rotate patch by a random angle\n",
    "#         TRAIN_PATCH.append(CLASSES[i][j])\n",
    "#         TRAIN_LABELS.append(i)\n",
    "#         TRAIN_PATCH.append(flipped_patch)\n",
    "#         TRAIN_LABELS.append(i)\n",
    "\n",
    "#     for j in range(COUNT/2,COUNT/2 + 100):\n",
    "#         num = random.randint(0,2)\n",
    "#         if num == 0 :\n",
    "#             flipped_patch = np.flipud(CLASSES[i][j])\n",
    "#         if num == 1 :\n",
    "#             flipped_patch = np.fliplr(CLASSES[i][j])\n",
    "#         if num == 2 :\n",
    "#             no = random.randrange(-180,180,30)\n",
    "#             flipped_patch = scipy.ndimage.interpolation.rotate(CLASSES[i][j], no, axes=(1, 0), reshape=False, output=None, order=3, mode='constant', cval=0.0, prefilter=False)\n",
    "#         TEST_PATCH.append(CLASSES[i][j])\n",
    "#         TEST_LABELS.append(i)\n",
    "#         TEST_PATCH.append(flipped_patch)\n",
    "#         TEST_LABELS.append(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2557\n",
      "3200\n"
     ]
    }
   ],
   "source": [
    "print len(TEST_PATCH)\n",
    "print len(TRAIN_PATCH)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the patches in segments\n",
    "================================="
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. Training data\n",
    "----------------"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2 3 4 5 6 7\n"
     ]
    }
   ],
   "source": [
    "for i in range(len(TRAIN_PATCH)/(COUNT*2)):\n",
    "    train_dict = {}\n",
    "    start = i * (COUNT*2)\n",
    "    end = (i+1) * (COUNT*2)\n",
    "    file_name = 'Train_'+str(PATCH_SIZE)+'_'+str(i+1)+'.mat'\n",
    "    train_dict[\"train_patch\"] = TRAIN_PATCH[start:end]\n",
    "    train_dict[\"train_labels\"] = TRAIN_LABELS[start:end]\n",
    "    scipy.io.savemat(os.path.join(DATA_PATH, file_name),train_dict)\n",
    "    print i,"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. Test data\n",
    "-------------"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for i in range(len(TEST_PATCH)/(COUNT*2)):\n",
    "    test_dict = {}\n",
    "    start = i * (COUNT*2)\n",
    "    end = (i+1) * (COUNT*2)\n",
    "    file_name = 'Test_'+str(PATCH_SIZE)+'_'+str(i+1)+'.mat'\n",
    "    test_dict[\"test_patch\"] = TEST_PATCH[start:end]\n",
    "    test_dict[\"test_labels\"] = TEST_LABELS[start:end]\n",
    "    scipy.io.savemat(os.path.join(DATA_PATH, file_name),test_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(TRAIN_PATCH)/(COUNT*2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
